# Percentages for each category / factor level

# following to obtain my sample data:
mydataframe <- read.delim("http://www.robin-beaumont.co.uk/virtualclassroom/stats/basics/coursework/data/pain_medication.dat", header=TRUE)

# local version ignore:
# mydataframe <- read.delim("D:\\web_sites_mine\\HIcourseweb 
# new\\book2data\\pain_medication.dat", header=TRUE)
# names(mydataframe)

str(mydataframe)
mydataframe

freq_dosagetable <- table(mydataframe$dosage)
freq_dosagetable
# cumsum provides the cumulative total number
cum_freq <- cumsum(freq_dosagetable)
cum_freq
# also need the toal number of observations using length()
totalcount <- length(mydataframe$dosage)
totalcount
# now the relative frequency is the count
# i.e freq_dosagetable dividied by total
rel_freq <- freq_dosagetable / totalcount
rel_freq
# and the cumulative relative frequency
# is the cumulative count divided by the total
cum_rel_freq <- cum_freq / totalcount
cum_rel_freq

######### to get the results into a dataframe use the pylr library
# can also do multiple categories
install.packages("plyr", dependencies=TRUE)
library(plyr)

count(mydataframe, c('dosage','health'))
# produces
#  dosage health freq
#1   High   Fair   34
#2   High   Good   42
#3   High   Poor   12
#4    Low   Fair   47
#5    Low   Good   49
#6    Low   Poor   16
#########

count(mydataframe, c('dosage','health','gender'))
# produces
#  dosage health gender freq
#1    High   Fair Female   14
#2    High   Fair   Male   20
#3    High   Good Female   20
#4    High   Good   Male   22
#5    High   Poor Female    8
#6    High   Poor   Male    4
#7     Low   Fair Female   23
#8     Low   Fair   Male   24
#9     Low   Good Female   25
#10    Low   Good   Male   24
#11    Low   Poor Female   11
#12    Low   Poor   Male    5